The purpose of this notebook is to define a dataframe with two column (cell barcodes and annotation) of all cells and clusters that we will include in the final dataset (cluster freeze). The cells to include come from three different sources:
We will fetch the UMAP coords and annotation for (1) and (2) from the Seurat objects we saved in the previous notebook (00-harmonize_seurat_objects.Rmd). For the third case, we will upload intermediate Seurat objects and early levels of the analysis.
library(Seurat)
library(harmony)
library(tidyverse)
library(here)
All the parameters (paths, colors, variables) are encoded in the file “utils_final_clusters.R”:
source(here("scRNA-seq/bin/utils_final_clusters.R"))
# Read
myeloid <- readRDS(path_to_save_myeloid)
myeloid_multi <- readRDS(path_to_multiome_myeloid)
DimPlot(myeloid, group.by = "annotation_20220215", cols = color_palette)
DimPlot(myeloid, group.by = "annotation_figure_1", cols = color_palette)
table(myeloid_multi$assay)
##
## 3P 5P multiome
## 3017 189 407
DimPlot(myeloid_multi, cols = color_palette, split.by = "assay")
# Set up dataframe to plot figure 1
myeloid_multi <- subset(myeloid_multi, assay == "multiome")
myeloid_multi$barcode <- colnames(myeloid_multi)
myeloid_multi$annotation_figure_1 <- NA
myeloid_multi$annotation_20220215 <- "myeloid_multiome"
myeloid_multi$UMAP_1_20220215 <- NA
myeloid_multi$UMAP_2_20220215 <- NA
fig1_df <- myeloid@meta.data
condition1 <- all(selected_cols %in% colnames(myeloid_multi@meta.data))
condition2 <- all(myeloid_multi$assay == "multiome")
if (condition1 & condition2) {
fig1_df <- bind_rows(fig1_df, myeloid_multi@meta.data[, selected_cols])
} else {
stop("Conditions not met!")
}
# Remove
rm(myeloid, myeloid_multi)
# Read
epithelial <- readRDS(path_to_save_epithelial)
epithelial_multi <- readRDS(path_to_multiome_epithelial)
DimPlot(epithelial, group.by = "annotation_20220215", cols = color_palette)
DimPlot(epithelial, group.by = "annotation_figure_1", cols = color_palette)
table(epithelial_multi$assay)
##
## 3P 5P multiome
## 290 1 84
DimPlot(epithelial_multi, cols = color_palette, split.by = "assay")
# Set up dataframe to plot figure 1
epithelial_multi <- subset(epithelial_multi, assay == "multiome")
epithelial_multi$barcode <- colnames(epithelial_multi)
epithelial_multi$annotation_figure_1 <- "epithelial"
epithelial_multi$annotation_20220215 <- "epithelial_multiome"
epithelial_multi$UMAP_1_20220215 <- NA
epithelial_multi$UMAP_2_20220215 <- NA
condition1 <- all(selected_cols %in% colnames(epithelial_multi@meta.data))
condition2 <- all(epithelial_multi$assay == "multiome")
if (condition1 & condition2) {
fig1_df <- bind_rows(
fig1_df,
epithelial@meta.data,
epithelial_multi@meta.data[, selected_cols]
)
} else {
stop("Conditions not met!")
}
# Remove
rm(epithelial, epithelial_multi)
# Read
pdc <- readRDS(path_to_save_pdc)
pdc_multi <- readRDS(path_to_multiome_pdc)
DimPlot(pdc, group.by = "annotation_20220215", cols = color_palette)
DimPlot(pdc, group.by = "annotation_figure_1", cols = color_palette)
DimPlot(pdc_multi, cols = color_palette, split.by = "assay")
table(pdc$hospital)
##
## CIMA Clinic
## 243 79
table(pdc_multi$hospital)
##
## CIMA Clinic Royal London
## 377 164 88
pdc <- subset(pdc, hospital != "Royal London")
pdc_multi <- subset(pdc_multi, hospital != "Royal London")
# Set up dataframe to plot figure 1
pdc_multi <- subset(pdc_multi, assay == "multiome")
pdc_multi$barcode <- colnames(pdc_multi)
pdc_multi$annotation_figure_1 <- "PDC"
pdc_multi$annotation_20220215 <- "PDC_multiome"
pdc_multi$UMAP_1_20220215 <- NA
pdc_multi$UMAP_2_20220215 <- NA
condition1 <- all(selected_cols %in% colnames(pdc_multi@meta.data))
condition2 <- all(pdc_multi$assay == "multiome")
if (condition1 & condition2) {
fig1_df <- bind_rows(
fig1_df,
pdc@meta.data,
pdc_multi@meta.data[, selected_cols]
)
} else {
stop("Conditions not met!")
}
# Remove
rm(pdc, pdc_multi)
# Read
fdc <- readRDS(path_to_save_fdc)
fdc_multi <- readRDS(path_to_multiome_fdc)
fdc_proliferative <- readRDS(path_to_fdc_proliferative)
DimPlot(fdc, group.by = "annotation_20220215", cols = color_palette)
DimPlot(fdc, group.by = "annotation_figure_1", cols = color_palette)
table(fdc_multi$assay)
##
## 3P 5P multiome
## 1508 26 54
DimPlot(fdc_multi, cols = color_palette, split.by = "assay")
table(fdc_proliferative$assay)
##
## 3P
## 223
table(fdc$hospital)
##
## CIMA Clinic
## 908 18
fdc <- subset(fdc, hospital != "Royal London")
# Set up dataframe to plot figure 1
fdc_multi <- subset(fdc_multi, assay == "multiome")
fdc_multi$barcode <- colnames(fdc_multi)
fdc_multi$annotation_figure_1 <- "FDC"
fdc_multi$annotation_20220215 <- "FDC_multiome"
fdc_proliferative$annotation_figure_1 <- "cycling FDC"
fdc_proliferative$annotation_20220215 <- "cycling FDC"
fdc_multi$UMAP_1_20220215 <- NA
fdc_multi$UMAP_2_20220215 <- NA
fdc_proliferative$UMAP_1_20220215 <- NA
fdc_proliferative$UMAP_2_20220215 <- NA
condition1 <- all(selected_cols %in% colnames(fdc_multi@meta.data))
condition2 <- all(fdc_multi$assay == "multiome")
condition3 <- all(selected_cols %in% colnames(fdc_proliferative@meta.data))
if (condition1 & condition2 & condition3) {
fig1_df <- bind_rows(
fig1_df,
fdc@meta.data,
fdc_multi@meta.data[, selected_cols],
fdc_proliferative@meta.data[, selected_cols],
)
} else {
stop("Conditions not met!")
}
# Remove
rm(fdc, fdc_multi, fdc_proliferative)
Note that, because we have very few preB cells, we couldn’t stritify them into meaningful clusters. Thus, all of them are labeled as “preB”:
# Read
preB <- readRDS(path_to_save_preB)
DimPlot(preB, group.by = "annotation_20220215", cols = color_palette)
DimPlot(preB, group.by = "annotation_figure_1", cols = color_palette)
table(preB$assay)
##
## 3P multiome
## 68 2
table(preB$hospital)
##
## CIMA Clinic
## 69 1
DimPlot(preB, cols = color_palette, split.by = "hospital")
preB <- subset(preB, hospital != "Royal London")
# Set up dataframe to plot figure 1
condition1 <- all(selected_cols %in% colnames(preB@meta.data))
if (condition1) {
fig1_df <- bind_rows(fig1_df, preB@meta.data[, selected_cols])
} else {
stop("Conditions not met!")
}
# Remove
rm(preB)
# Read
preT <- readRDS(path_to_save_preT)
DimPlot(preT, group.by = "annotation_20220215", cols = color_palette)
DimPlot(preT, group.by = "annotation_figure_1", cols = color_palette)
table(preT$assay)
##
## 3P
## 12
table(preT$hospital)
##
## CIMA
## 12
DimPlot(preT, cols = color_palette, split.by = "hospital")
preT <- subset(preT, hospital != "Royal London")
# Set up dataframe to plot figure 1
condition1 <- all(selected_cols %in% colnames(preT@meta.data))
if (condition1) {
fig1_df <- bind_rows(fig1_df, preT@meta.data[, selected_cols])
} else {
stop("Conditions not met!")
}
# Remove
rm(preT)
All the cell types hereafter included multiome cells, so we only load the last object with all cells.
# Read
ilc_nk <- readRDS(path_to_save_ilc_nk)
DimPlot(ilc_nk, group.by = "annotation_20220215", cols = color_palette)
DimPlot(ilc_nk, group.by = "annotation_figure_1", cols = color_palette)
table(ilc_nk$assay)
##
## 3P multiome
## 473 192
table(ilc_nk$hospital)
##
## CIMA Clinic
## 586 79
DimPlot(ilc_nk, cols = color_palette, split.by = "hospital")
# Set up dataframe to plot figure 1
condition1 <- all(selected_cols %in% colnames(ilc_nk@meta.data))
if (condition1) {
fig1_df <- bind_rows(fig1_df, ilc_nk@meta.data[, selected_cols])
} else {
stop("Conditions not met!")
}
# Remove
rm(ilc_nk)
# Read
cd8 <- readRDS(path_to_save_cd8)
DimPlot(cd8, group.by = "annotation_20220215", cols = color_palette)
DimPlot(cd8, group.by = "annotation_figure_1", cols = color_palette)
table(cd8$assay)
##
## 3P multiome
## 8203 1757
table(cd8$hospital)
##
## CIMA Clinic
## 8757 1203
DimPlot(cd8, cols = color_palette, split.by = "hospital")
# Set up dataframe to plot figure 1
condition1 <- all(selected_cols %in% colnames(cd8@meta.data))
if (condition1) {
fig1_df <- bind_rows(fig1_df, cd8@meta.data[, selected_cols])
} else {
stop("Conditions not met!")
}
# Remove
rm(cd8)
# Read
cd4 <- readRDS(path_to_save_cd4)
cd4_proli <- readRDS(path_to_proliferative_cd4_t)
DimPlot(cd4, group.by = "annotation_20220215", cols = color_palette)
DimPlot(cd4, group.by = "annotation_figure_1", cols = color_palette)
table(cd4$assay)
##
## 3P multiome
## 39834 12473
table(cd4$hospital)
##
## CIMA Clinic
## 41222 11085
DimPlot(cd4, cols = color_palette, split.by = "hospital")
table(cd4_proli$assay)
##
## 3P multiome
## 1677 183
table(cd4_proli$hospital)
##
## CIMA Clinic
## 1709 151
DimPlot(cd4_proli, cols = color_palette, split.by = "hospital")
# Set up dataframe to plot figure 1
cd4_proli$annotation_figure_1 <- "cycling T"
cd4_proli$annotation_20220215 <- "cycling T"
cd4_proli$UMAP_1_20220215 <- NA
cd4_proli$UMAP_2_20220215 <- NA
condition1 <- all(selected_cols %in% colnames(cd4@meta.data))
condition2 <- all(selected_cols %in% colnames(cd4_proli@meta.data))
if (condition1 & condition2) {
fig1_df <- bind_rows(
fig1_df,
cd4@meta.data,
cd4_proli@meta.data[, selected_cols]
)
} else {
stop("Conditions not met!")
}
# Remove
rm(cd4, cd4_proli)
The challenge with all the Seurat objects coming from the B cell lineage is that it also includes cells that belong to other cell types/states. For instance in the PC object, we have cells from GCBC and NBC/MBC. Hence, we will subset them to avoid duplication:
# Read
pc <- readRDS(path_to_save_pc)
DimPlot(pc, group.by = "annotation_20220215", cols = color_palette)
DimPlot(pc, group.by = "annotation_figure_1", cols = color_palette)
pc <- subset(pc, annotation_figure_1 == "PC")
table(pc$assay)
##
## 3P multiome
## 7252 1836
table(pc$hospital)
##
## CIMA Clinic
## 7598 1490
DimPlot(pc, cols = color_palette, split.by = "hospital")
# Set up dataframe to plot figure 1
condition1 <- all(selected_cols %in% colnames(pc@meta.data))
if (condition1) {
fig1_df <- bind_rows(fig1_df, pc@meta.data)
} else {
stop("Conditions not met!")
}
# Remove
rm(pc)
# Read
nbc_mbc <- readRDS(path_to_save_nbc_mbc)
DimPlot(nbc_mbc, group.by = "annotation_20220215", cols = color_palette)
DimPlot(nbc_mbc, group.by = "annotation_figure_1", cols = color_palette)
nbc_mbc <- subset(nbc_mbc, annotation_level_1 == "NBC_MBC")
table(nbc_mbc$assay)
##
## 3P multiome
## 86563 25915
table(nbc_mbc$hospital)
##
## CIMA Clinic
## 90908 21570
DimPlot(nbc_mbc, cols = color_palette, split.by = "hospital")
# Set up dataframe to plot figure 1
condition1 <- all(selected_cols %in% colnames(nbc_mbc@meta.data))
if (condition1) {
fig1_df <- bind_rows(fig1_df, nbc_mbc@meta.data)
} else {
stop("Conditions not met!")
}
# Remove
rm(nbc_mbc)
# Read
gcbc <- readRDS(path_to_save_gcbc)
DimPlot(gcbc, group.by = "annotation_20220215", cols = color_palette)
DimPlot(gcbc, group.by = "annotation_figure_1", cols = color_palette)
table(gcbc$assay)
##
## 3P multiome
## 61724 10450
table(gcbc$hospital)
##
## CIMA Clinic
## 69661 2513
DimPlot(gcbc, cols = color_palette, split.by = "hospital")
# Set up dataframe to plot figure 1
condition1 <- all(selected_cols %in% colnames(gcbc@meta.data))
if (condition1) {
fig1_df <- bind_rows(fig1_df, gcbc@meta.data)
} else {
stop("Conditions not met!")
}
# Remove
rm(gcbc)
# Duplicated cells?
any(duplicated(fig1_df$barcode))
## [1] FALSE
# Lingering cells from King et al.?
any(fig1_df$hospital == "Royal London")
## [1] FALSE
saveRDS(fig1_df, path_to_save_umap_df_rds)
write_delim(
fig1_df,
file = path_to_save_umap_df_csv,
delim = ";",
col_names = TRUE
)
sessionInfo()
## R version 4.0.1 (2020-06-06)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux Server release 6.7 (Santiago)
##
## Matrix products: default
## BLAS: /apps/R/4.0.1/lib64/R/lib/libRblas.so
## LAPACK: /apps/R/4.0.1/lib64/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=es_ES.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=es_ES.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=es_ES.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] here_1.0.1 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.4 purrr_0.3.4 readr_1.4.0 tidyr_1.1.2 tibble_3.0.6 ggplot2_3.3.3 tidyverse_1.3.0 harmony_0.1.0 Rcpp_1.0.6 SeuratObject_4.0.0 Seurat_4.0.0 BiocStyle_2.16.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.1 plyr_1.8.6 igraph_1.2.6 lazyeval_0.2.2 splines_4.0.1 listenv_0.8.0 scattermore_0.7 digest_0.6.27 htmltools_0.5.1.1 fansi_0.4.2 magrittr_2.0.1 tensor_1.5 cluster_2.1.0 ROCR_1.0-11 globals_0.14.0 modelr_0.1.8 matrixStats_0.58.0 colorspace_2.0-0 rvest_0.3.6 ggrepel_0.9.1 haven_2.3.1 xfun_0.21 crayon_1.4.1 jsonlite_1.7.2 spatstat_1.64-1 spatstat.data_2.0-0 survival_3.2-3 zoo_1.8-8 glue_1.4.2 polyclip_1.10-0 gtable_0.3.0 leiden_0.3.7 future.apply_1.7.0 abind_1.4-5 scales_1.1.1 DBI_1.1.1 miniUI_0.1.1.1 viridisLite_0.3.0 xtable_1.8-4 reticulate_1.18 htmlwidgets_1.5.3 httr_1.4.2 RColorBrewer_1.1-2 ellipsis_0.3.1 ica_1.0-2 farver_2.0.3 pkgconfig_2.0.3 sass_0.3.1 uwot_0.1.10 dbplyr_2.1.0 deldir_0.2-10 utf8_1.1.4 tidyselect_1.1.0 labeling_0.4.2 rlang_0.4.10
## [57] reshape2_1.4.4 later_1.1.0.1 munsell_0.5.0 cellranger_1.1.0 tools_4.0.1 cli_2.3.1 generics_0.1.0 broom_0.7.4 ggridges_0.5.3 evaluate_0.14 fastmap_1.1.0 yaml_2.2.1 goftest_1.2-2 knitr_1.31 fs_1.5.0 fitdistrplus_1.1-3 RANN_2.6.1 pbapply_1.4-3 future_1.21.0 nlme_3.1-148 mime_0.10 xml2_1.3.2 compiler_4.0.1 rstudioapi_0.13 plotly_4.9.3 png_0.1-7 spatstat.utils_2.0-0 reprex_1.0.0 bslib_0.2.4 stringi_1.5.3 highr_0.8 ps_1.5.0 lattice_0.20-41 Matrix_1.2-18 vctrs_0.3.6 pillar_1.5.0 lifecycle_1.0.0 BiocManager_1.30.10 lmtest_0.9-38 jquerylib_0.1.3 RcppAnnoy_0.0.18 data.table_1.14.0 cowplot_1.1.1 irlba_2.3.3 httpuv_1.5.5 patchwork_1.1.1 R6_2.5.0 bookdown_0.21 promises_1.2.0.1 KernSmooth_2.23-17 gridExtra_2.3 parallelly_1.23.0 codetools_0.2-16 MASS_7.3-51.6 assertthat_0.2.1 rprojroot_2.0.2
## [113] withr_2.4.1 sctransform_0.3.2 mgcv_1.8-31 parallel_4.0.1 hms_1.0.0 grid_4.0.1 rpart_4.1-15 rmarkdown_2.7 Rtsne_0.15 shiny_1.6.0 lubridate_1.7.9.2